package devices

import (
	"bytes"
	"encoding/csv"
	"errors"
	"fmt"
	"os/exec"
	"strconv"
	"sync"
	"time"
)

// Set up variables and register this plug-in with the main code.
// The functions Register*(f) tell gotop which of these plugin functions to
// call to update data; the RegisterStartup() function sets the function
// that gotop will call when everything else has been done and the plugin
// should start collecting data.
//
// In this plugin, one call to the nvidia program returns *all* the data
// we're looking for, but gotop will call each update function during each
// cycle. This means that the nvidia program would be called 3 (or more)
// times per update, which isn't very efficient. Therefore, we make this
// code more complex to run a job in the background that runs the nvidia
// tool periodically and puts the results into hashes; the update functions
// then just sync data from those hashes into the return data.
func init() {
	RegisterStartup(startNVidia)
}

// updateNvidiaTemp copies data from the local _temps cache into the passed-in
// return-value map. It is called once per cycle by gotop.
func updateNvidiaTemp(temps map[string]int) map[string]error {
	nvidiaLock.Lock()
	defer nvidiaLock.Unlock()
	for k, v := range _temps {
		temps[k] = v
	}
	return _errors
}

// updateNvidiaMem copies data from the local _mems cache into the passed-in
// return-value map. It is called once per cycle by gotop.
func updateNvidiaMem(mems map[string]MemoryInfo) map[string]error {
	nvidiaLock.Lock()
	defer nvidiaLock.Unlock()
	for k, v := range _mems {
		mems[k] = v
	}
	return _errors
}

// updateNvidiaUsage copies data from the local _cpus cache into the passed-in
// return-value map. It is called once per cycle by gotop.
func updateNvidiaUsage(cpus map[string]int, _ bool) map[string]error {
	nvidiaLock.Lock()
	defer nvidiaLock.Unlock()
	for k, v := range _cpus {
		cpus[k] = v
	}
	return _errors
}

// startNVidia is called once by gotop, and forks a thread to call the nvidia
// tool periodically and update the cached cpu, memory, and temperature
// values that are used by the update*() functions to return data to gotop.
//
// The vars argument contains command-line arguments to allow the plugin
// to change runtime options; the only option currently supported is the
// `nvidia-refresh` arg, which is expected to be a time.Duration value and
// sets how frequently the nvidia tool is called to refresh the date.
func startNVidia(vars map[string]string) error {
	if vars["nvidia"] != "true" {
		return nil
	}
	_, err := exec.Command("nvidia-smi", "-L").Output()
	if err != nil {
		return errors.New(fmt.Sprintf("NVidia GPU error: %s", err))
	}
	_errors = make(map[string]error)
	_temps = make(map[string]int)
	_mems = make(map[string]MemoryInfo)
	_cpus = make(map[string]int)
	_errors = make(map[string]error)
	RegisterTemp(updateNvidiaTemp)
	RegisterMem(updateNvidiaMem)
	RegisterCPU(updateNvidiaUsage)

	nvidiaLock = sync.Mutex{}
	// Get the refresh period from the passed-in command-line/config
	// file options
	refresh := time.Second
	if v, ok := vars["nvidia-refresh"]; ok {
		if refresh, err = time.ParseDuration(v); err != nil {
			return err
		}
	}
	// update once to populate the device names, for the widgets.
	updateNvidia()
	// Fork off a long-running job to call the nvidia tool periodically,
	// parse out the values, and put them in the cache.
	go func() {
		timer := time.Tick(refresh)
		for range timer {
			updateNvidia()
		}
	}()
	return nil
}

// Caches for the output from the nvidia tool; the update() functions pull
// from these and return the values to gotop when requested.
var (
	_temps map[string]int
	_mems  map[string]MemoryInfo
	_cpus  map[string]int
	// A cache of errors generated by the background job running the nvidia tool;
	// these errors are returned to gotop when it calls the update() functions.
	_errors map[string]error
)

var nvidiaLock sync.Mutex

// updateNvidia calls the nvidia tool, parses the output, and caches the results
// in the various _* maps. The metric data parsed is: name, index,
// temperature.gpu, utilization.gpu, utilization.memory, memory.total,
// memory.free, memory.used
//
// If this function encounters an error calling `nvidia-smi`, it caches the
// error and returns immediately. We expect exec errors only when the tool
// isn't available, or when it fails for some reason; no exec error cases
// are recoverable. This does **not** stop the cache job; that will continue
// to run and continue to call updateNvidia().
func updateNvidia() {
	bs, err := exec.Command(
		"nvidia-smi",
		"--query-gpu=name,index,temperature.gpu,utilization.gpu,memory.total,memory.used",
		"--format=csv,noheader,nounits").Output()
	if err != nil {
		_errors["nvidia"] = err
		//bs = []byte("GeForce GTX 1080 Ti, 0, 31, 9, 11175, 206")
		return
	}
	csvReader := csv.NewReader(bytes.NewReader(bs))
	csvReader.TrimLeadingSpace = true
	records, err := csvReader.ReadAll()
	if err != nil {
		_errors["nvidia"] = err
		return
	}

	// Ensure we're not trying to modify the caches while they're being read by the update() functions.
	nvidiaLock.Lock()
	defer nvidiaLock.Unlock()
	// Errors during parsing are recorded, but do not stop parsing.
	for _, row := range records {
		// The name of the devices is the nvidia-smi "<name>.<index>"
		name := row[0] + "." + row[1]
		if _temps[name], err = strconv.Atoi(row[2]); err != nil {
			_errors[name] = err
		}
		if _cpus[name], err = strconv.Atoi(row[3]); err != nil {
			_errors[name] = err
		}
		t, err := strconv.Atoi(row[4])
		if err != nil {
			_errors[name] = err
		}
		u, err := strconv.Atoi(row[5])
		if err != nil {
			_errors[name] = err
		}
		_mems[name] = MemoryInfo{
			Total:       1048576 * uint64(t),
			Used:        1048576 * uint64(u),
			UsedPercent: (float64(u) / float64(t)) * 100.0,
		}
	}
}
